dataset:
  name: super_glue.multirc
  path: # dataset in huggingface doesn't need path

plm:
  model_name: t5
  model_path: t5-large
  optimize:
    freeze_para: True
    lr: 1.0e-5
    weight_decay: 0.0
    scheduler:
      type: 
      num_warmup_steps: 500

dataloader:
  max_seq_length: 512  # max_seq_length 
  decoder_max_length: 3 # the decoder max length to truncate decoder input sequence
                    # if it is an encoder-decoder architecture. Note that it's not equavalent
                    # to generation.max_length which is used merely in the generation phase.
  truncate_method: "head" # choosing from balanced, head, tail
  decode_from_pad: true

train:
  batch_size: 4
  gradient_accumulation_steps: 1
  max_grad_norm: 1.0
  num_epochs:
  num_training_steps: 30000


test:
  batch_size: 16

dev:
  batch_size: 16


template: soft_template
verbalizer: manual_verbalizer



soft_template:
  choice: 0
  file_path: scripts/SuperGLUE/MultiRC/soft_template.txt
  num_tokens: 20
  initialize_from_vocab: true
  random_range: 0.5
  optimize: 
    name: AdamW
    lr: 0.3
    adam_epsilon: 1.0e-8
    scheduler:
      num_warmup_steps: 500


manual_verbalizer:
  choice: 0
  file_path: scripts/SuperGLUE/MultiRC/manual_verbalizer.txt
  
environment:
  num_gpus: 3
  cuda_visible_devices:
  local_rank: 0
  model_parallel: True
  device_map:

learning_setting: full